PROYECTO FINAL¶

i. Ordenamiento de data¶

i.i. Apertura de data¶

In [84]:
import pandas as pd
import altair as alt
alt.data_transformers.enable("vegafusion")
!pip install pyreadstat
!pip install wbgapi
!pip install "vegafusion[embed]>=1.5.0"
!pip install "vl-convert-python>=1.6.0"
Requirement already satisfied: pyreadstat in /usr/local/lib/python3.12/dist-packages (1.3.2)
Requirement already satisfied: narwhals>=2.0 in /usr/local/lib/python3.12/dist-packages (from pyreadstat) (2.15.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from pyreadstat) (2.0.2)
Requirement already satisfied: wbgapi in /usr/local/lib/python3.12/dist-packages (1.0.12)
Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from wbgapi) (2.32.4)
Requirement already satisfied: PyYAML in /usr/local/lib/python3.12/dist-packages (from wbgapi) (6.0.3)
Requirement already satisfied: tabulate in /usr/local/lib/python3.12/dist-packages (from wbgapi) (0.9.0)
Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->wbgapi) (3.4.4)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->wbgapi) (3.11)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->wbgapi) (2.5.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->wbgapi) (2026.1.4)
Requirement already satisfied: vegafusion>=1.5.0 in /usr/local/lib/python3.12/dist-packages (from vegafusion[embed]>=1.5.0) (2.0.3)
Requirement already satisfied: arro3-core in /usr/local/lib/python3.12/dist-packages (from vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (0.6.5)
Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (25.0)
Requirement already satisfied: narwhals>=1.42 in /usr/local/lib/python3.12/dist-packages (from vegafusion>=1.5.0->vegafusion[embed]>=1.5.0) (2.15.0)
Requirement already satisfied: vl-convert-python>=1.6.0 in /usr/local/lib/python3.12/dist-packages (1.9.0)
In [85]:
modulo_1 = pd.read_csv('/content/Enaho01-2024-100.csv', encoding='latin1')
modulo_32 = pd.read_csv('/content/Sumaria-2024.csv', encoding='latin1')

modulo_32.head()
Out[85]:
AÑO MES CONGLOME VIVIENDA HOGAR UBIGEO DOMINIO ESTRATO MIEPERHO TOTMIEHO ... ESTRSOCIAL LD LINPE LINEA POBREZA FACTOR07 LINEAV POBREZAV NCONGLOME SUB_CONGLOME
0 2024 1 15009 13 11 10101 4 4 2 2 ... 4 0.815806 231.437622 382.024597 3 79.816757 672.335510 4 7098 0
1 2024 1 15009 47 11 10101 4 4 3 3 ... 4 0.815806 231.437622 382.024597 3 79.816757 684.988831 4 7098 0
2 2024 1 15009 59 11 10101 4 4 1 1 ... 4 0.815806 231.437622 382.024597 3 79.816757 705.972351 4 7098 0
3 2024 1 15009 71 11 10101 4 4 2 2 ... 4 0.815806 231.437622 382.024597 3 79.816757 703.466370 4 7098 0
4 2024 1 15009 84 11 10101 4 4 5 5 ... 4 0.815806 231.437622 382.024597 3 79.816757 686.349243 3 7098 0

5 rows × 163 columns

i.ii Limpieza de data¶

In [86]:
columnas_m1 = ['CONGLOME', 'VIVIENDA', 'HOGAR' , 'DOMINIO','NBI1', 'NBI2', 'NBI3',
               'NBI4', 'NBI5']
modulo_1 = modulo_1[columnas_m1]

print(modulo_1.columns)
modulo_1.head()
Index(['CONGLOME', 'VIVIENDA', 'HOGAR', 'DOMINIO', 'NBI1', 'NBI2', 'NBI3',
       'NBI4', 'NBI5'],
      dtype='object')
Out[86]:
CONGLOME VIVIENDA HOGAR DOMINIO NBI1 NBI2 NBI3 NBI4 NBI5
0 15006 13 11 4 0 0 0 0 0
1 15006 27 11 4 0 0 0 0 0
2 15006 50 11 4 0 0 0 0 0
3 15006 64 11 4 0 0 0 0 0
4 15006 76 11 4 0 0 0 0 0
In [87]:
columnas_m32 = ['CONGLOME', 'VIVIENDA', 'DOMINIO', 'HOGAR', 'MIEPERHO', 'PERCEPHO',
                'POBREZA', 'GASHOG2D', 'INGHOG2D']
modulo_32 = modulo_32[columnas_m32]

print(modulo_32.columns)
modulo_32.head()
Index(['CONGLOME', 'VIVIENDA', 'DOMINIO', 'HOGAR', 'MIEPERHO', 'PERCEPHO',
       'POBREZA', 'GASHOG2D', 'INGHOG2D'],
      dtype='object')
Out[87]:
CONGLOME VIVIENDA DOMINIO HOGAR MIEPERHO PERCEPHO POBREZA GASHOG2D INGHOG2D
0 15009 13 4 11 2 2 3 34188.218750 52162.609375
1 15009 47 4 11 3 2 3 40164.945312 40832.042969
2 15009 59 4 11 1 1 3 12308.838867 15098.497070
3 15009 71 4 11 2 2 3 30316.724609 41082.953125
4 15009 84 4 11 5 2 3 33076.910156 47659.160156
In [88]:
def categoria(i):
  if i < 13000:
    return 'Ingreso Bajo'
  elif i < 35000:
    return 'Ingreso Medio'
  else:
    return 'Ingreso Alto'

modulo_32['CATEGING'] = modulo_32['INGHOG2D'].apply(categoria)
print(modulo_32[['CATEGING']])
            CATEGING
0       Ingreso Alto
1       Ingreso Alto
2      Ingreso Medio
3       Ingreso Alto
4       Ingreso Alto
...              ...
33686   Ingreso Alto
33687   Ingreso Alto
33688   Ingreso Alto
33689   Ingreso Alto
33690  Ingreso Medio

[33691 rows x 1 columns]
In [89]:
def asistencia(i):
  if i == '1':
    return 'No asiste'
  else:
    return 'Asiste'

modulo_1['CATEGNBI4'] = modulo_1['NBI4'].apply(asistencia)
print(modulo_1[['CATEGNBI4']])
      CATEGNBI4
0        Asiste
1        Asiste
2        Asiste
3        Asiste
4        Asiste
...         ...
44726    Asiste
44727    Asiste
44728    Asiste
44729    Asiste
44730    Asiste

[44731 rows x 1 columns]
In [90]:
enaho = pd.merge(modulo_1, modulo_32,
                 on=['CONGLOME', 'VIVIENDA', 'HOGAR', 'DOMINIO'])

enaho.head()
Out[90]:
CONGLOME VIVIENDA HOGAR DOMINIO NBI1 NBI2 NBI3 NBI4 NBI5 CATEGNBI4 MIEPERHO PERCEPHO POBREZA GASHOG2D INGHOG2D CATEGING
0 15006 13 11 4 0 0 0 0 0 Asiste 2 2 3 12711.547852 12983.209961 Ingreso Bajo
1 15006 27 11 4 0 0 0 0 0 Asiste 3 3 2 8784.480469 8993.144531 Ingreso Bajo
2 15006 50 11 4 0 0 0 0 0 Asiste 4 4 3 44404.941406 127551.609375 Ingreso Alto
3 15006 64 11 4 0 0 0 0 0 Asiste 4 3 2 12542.700195 16807.876953 Ingreso Medio
4 15006 76 11 4 0 0 0 0 0 Asiste 2 1 3 17669.095703 17385.957031 Ingreso Medio
In [91]:
def region(d):

    dominio = [1, 2, 3, 4, 5, 6, 7, 8]

    regiones = ["Costa Norte", "Costa Centro", "Costa Sur",
        "Sierra Norte", "Sierra Centro", "Sierra Sur",
        "Selva", "Lima Metropolitana"]

    if d in dominio:

        nom_regiones = dominio.index(d)

        return regiones[nom_regiones]

enaho['GEO'] = enaho['DOMINIO'].apply(region)
print(enaho[['GEO']])
                GEO
0      Sierra Norte
1      Sierra Norte
2      Sierra Norte
3      Sierra Norte
4      Sierra Norte
...             ...
33686         Selva
33687         Selva
33688         Selva
33689         Selva
33690         Selva

[33691 rows x 1 columns]
In [92]:
enaho.columns
Out[92]:
Index(['CONGLOME', 'VIVIENDA', 'HOGAR', 'DOMINIO', 'NBI1', 'NBI2', 'NBI3',
       'NBI4', 'NBI5', 'CATEGNBI4', 'MIEPERHO', 'PERCEPHO', 'POBREZA',
       'GASHOG2D', 'INGHOG2D', 'CATEGING', 'GEO'],
      dtype='object')

Gráficos¶

In [93]:
grafico1 = alt.Chart(enaho, width=300, height=300).mark_bar().encode(
    x = alt.X("CATEGING:O",
              title = "Categoría de Ingresos"),
    y = alt.Y("count()",
              title = "Número de Hogares"),
    color = alt.Color("CATEGNBI4:N",
                      title = "Condición de Asistencia"),
    xOffset = alt.XOffset('CATEGNBI4:N')

).interactive().properties(
    title={
      "text": "Relación entre el Ingreso anual y Asistencia Escolar",
      "subtitle": "Gráfico de barras - Fuente: ENAHO 2024",
      "color": "Black",
      "subtitleColor": "Light red"
    }
)
grafico1
Out[93]:

El grafico de barras apliadas evidencia que pese a que las diferencias del nivel de ingreso que puedan tener los hogares los niños asisten a la escuela. El número de hogares con niños que no asisten a la escuela es menor a 500. A pesar de que lo que intutivamente se podría pensar, los hogares con ingresos anuales bajos son los que menor inasistencia presenta. Posionando al acceso a la educación como un derecho exitoso en el territorio.

In [94]:
grafico2 = alt.Chart(enaho).mark_point(filled = True).encode(
    x = alt.X("GEO:N", title='Ubicación geográfica'),
    y = alt.Y("GASHOG2D:Q", title='Gasto anual del hogar'),
    color = alt.Color("GEO:N", title='Ubicación geográfica',
            legend=alt.Legend(orient='bottom', titleOrient='left')
                     ),
    column = alt.Column("GEO:N")
    ).properties(width=300, height=400).interactive().properties(
    title={
      "text": "Relación entre la Ubicación geográfica y el Gasto anual del hogar",
      "subtitle": "Gráfico de multiples - Fuente: ENAHO 2024",
      "color": "Black",
      "subtitleColor": "Light red"
    }
)
grafico2
Out[94]:

El presente gráfico evidencia Lima Metropolitana es la región que más gasto anual presenta por hogar. Por el contrario, la Sierra Norte es la región geográfica que tiene el menor gasto anual, ello indicado por sus datos atípicos que a diferencia de sus congéneres de la misma región andina.

In [96]:
!jupyter nbconvert --to html 'Trabajo_Final.ipynb'
[NbConvertApp] WARNING | pattern 'Trabajo_Final.ipynb' matched no files
This application is used to convert notebook files (*.ipynb)
        to various other formats.

        WARNING: THE COMMANDLINE INTERFACE MAY CHANGE IN FUTURE RELEASES.

Options
=======
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
    <cmd> --help-all

--debug
    set log level to logging.DEBUG (maximize logging output)
    Equivalent to: [--Application.log_level=10]
--show-config
    Show the application's configuration (human-readable format)
    Equivalent to: [--Application.show_config=True]
--show-config-json
    Show the application's configuration (json format)
    Equivalent to: [--Application.show_config_json=True]
--generate-config
    generate default config file
    Equivalent to: [--JupyterApp.generate_config=True]
-y
    Answer yes to any questions instead of prompting.
    Equivalent to: [--JupyterApp.answer_yes=True]
--execute
    Execute the notebook prior to export.
    Equivalent to: [--ExecutePreprocessor.enabled=True]
--allow-errors
    Continue notebook execution even if one of the cells throws an error and include the error message in the cell output (the default behaviour is to abort conversion). This flag is only relevant if '--execute' was specified, too.
    Equivalent to: [--ExecutePreprocessor.allow_errors=True]
--stdin
    read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'
    Equivalent to: [--NbConvertApp.from_stdin=True]
--stdout
    Write notebook output to stdout instead of files.
    Equivalent to: [--NbConvertApp.writer_class=StdoutWriter]
--inplace
    Run nbconvert in place, overwriting the existing notebook (only
            relevant when converting to notebook format)
    Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory=]
--clear-output
    Clear output of current file and save in place,
            overwriting the existing notebook.
    Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory= --ClearOutputPreprocessor.enabled=True]
--coalesce-streams
    Coalesce consecutive stdout and stderr outputs into one stream (within each cell).
    Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory= --CoalesceStreamsPreprocessor.enabled=True]
--no-prompt
    Exclude input and output prompts from converted document.
    Equivalent to: [--TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True]
--no-input
    Exclude input cells and output prompts from converted document.
            This mode is ideal for generating code-free reports.
    Equivalent to: [--TemplateExporter.exclude_output_prompt=True --TemplateExporter.exclude_input=True --TemplateExporter.exclude_input_prompt=True]
--allow-chromium-download
    Whether to allow downloading chromium if no suitable version is found on the system.
    Equivalent to: [--WebPDFExporter.allow_chromium_download=True]
--disable-chromium-sandbox
    Disable chromium security sandbox when converting to PDF..
    Equivalent to: [--WebPDFExporter.disable_sandbox=True]
--show-input
    Shows code input. This flag is only useful for dejavu users.
    Equivalent to: [--TemplateExporter.exclude_input=False]
--embed-images
    Embed the images as base64 dataurls in the output. This flag is only useful for the HTML/WebPDF/Slides exports.
    Equivalent to: [--HTMLExporter.embed_images=True]
--sanitize-html
    Whether the HTML in Markdown cells and cell outputs should be sanitized..
    Equivalent to: [--HTMLExporter.sanitize_html=True]
--log-level=<Enum>
    Set the log level by value or name.
    Choices: any of [0, 10, 20, 30, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL']
    Default: 30
    Equivalent to: [--Application.log_level]
--config=<Unicode>
    Full path of a config file.
    Default: ''
    Equivalent to: [--JupyterApp.config_file]
--to=<Unicode>
    The export format to be used, either one of the built-in formats
            ['asciidoc', 'custom', 'html', 'latex', 'markdown', 'notebook', 'pdf', 'python', 'qtpdf', 'qtpng', 'rst', 'script', 'slides', 'webpdf']
            or a dotted object name that represents the import path for an
            ``Exporter`` class
    Default: ''
    Equivalent to: [--NbConvertApp.export_format]
--template=<Unicode>
    Name of the template to use
    Default: ''
    Equivalent to: [--TemplateExporter.template_name]
--template-file=<Unicode>
    Name of the template file to use
    Default: None
    Equivalent to: [--TemplateExporter.template_file]
--theme=<Unicode>
    Template specific theme(e.g. the name of a JupyterLab CSS theme distributed
    as prebuilt extension for the lab template)
    Default: 'light'
    Equivalent to: [--HTMLExporter.theme]
--sanitize_html=<Bool>
    Whether the HTML in Markdown cells and cell outputs should be sanitized.This
    should be set to True by nbviewer or similar tools.
    Default: False
    Equivalent to: [--HTMLExporter.sanitize_html]
--writer=<DottedObjectName>
    Writer class used to write the
                                        results of the conversion
    Default: 'FilesWriter'
    Equivalent to: [--NbConvertApp.writer_class]
--post=<DottedOrNone>
    PostProcessor class used to write the
                                        results of the conversion
    Default: ''
    Equivalent to: [--NbConvertApp.postprocessor_class]
--output=<Unicode>
    Overwrite base name use for output files.
                Supports pattern replacements '{notebook_name}'.
    Default: '{notebook_name}'
    Equivalent to: [--NbConvertApp.output_base]
--output-dir=<Unicode>
    Directory to write output(s) to. Defaults
                                  to output to the directory of each notebook. To recover
                                  previous default behaviour (outputting to the current
                                  working directory) use . as the flag value.
    Default: ''
    Equivalent to: [--FilesWriter.build_directory]
--reveal-prefix=<Unicode>
    The URL prefix for reveal.js (version 3.x).
            This defaults to the reveal CDN, but can be any url pointing to a copy
            of reveal.js.
            For speaker notes to work, this must be a relative path to a local
            copy of reveal.js: e.g., "reveal.js".
            If a relative path is given, it must be a subdirectory of the
            current directory (from which the server is run).
            See the usage documentation
            (https://nbconvert.readthedocs.io/en/latest/usage.html#reveal-js-html-slideshow)
            for more details.
    Default: ''
    Equivalent to: [--SlidesExporter.reveal_url_prefix]
--nbformat=<Enum>
    The nbformat version to write.
            Use this to downgrade notebooks.
    Choices: any of [1, 2, 3, 4]
    Default: 4
    Equivalent to: [--NotebookExporter.nbformat_version]

Examples
--------

    The simplest way to use nbconvert is

            > jupyter nbconvert mynotebook.ipynb --to html

            Options include ['asciidoc', 'custom', 'html', 'latex', 'markdown', 'notebook', 'pdf', 'python', 'qtpdf', 'qtpng', 'rst', 'script', 'slides', 'webpdf'].

            > jupyter nbconvert --to latex mynotebook.ipynb

            Both HTML and LaTeX support multiple output templates. LaTeX includes
            'base', 'article' and 'report'.  HTML includes 'basic', 'lab' and
            'classic'. You can specify the flavor of the format used.

            > jupyter nbconvert --to html --template lab mynotebook.ipynb

            You can also pipe the output to stdout, rather than a file

            > jupyter nbconvert mynotebook.ipynb --stdout

            PDF is generated via latex

            > jupyter nbconvert mynotebook.ipynb --to pdf

            You can get (and serve) a Reveal.js-powered slideshow

            > jupyter nbconvert myslides.ipynb --to slides --post serve

            Multiple notebooks can be given at the command line in a couple of
            different ways:

            > jupyter nbconvert notebook*.ipynb
            > jupyter nbconvert notebook1.ipynb notebook2.ipynb

            or you can specify the notebooks list in a config file, containing::

                c.NbConvertApp.notebooks = ["my_notebook.ipynb"]

            > jupyter nbconvert --config mycfg.py

To see all available configurables, use `--help-all`.